This is the coin flip problem, it is all over the book



In [3]:

    
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import seaborn as sns
import pymc3 as pm

%matplotlib inline
sns.set(font_scale=1.5)

Chaper 1

page 18



In [4]:

    
theta_real = 0.35
trials = [0, 1, 2, 3, 4, 8, 16, 32, 50, 150]
data = [0, 1, 1, 1, 1, 4, 6, 9, 13, 48]
beta_params = [(1, 1), (0.5, 0.5), (20, 20)]



In [5]:

    
plt.figure(figsize=(10,12))

dist = stats.beta
x = np.linspace(0, 1, 100)
for idx, N in enumerate(trials):
    if idx == 0:
        plt.subplot(4,3, 2)
    else:
        plt.subplot(4,3, idx+3)
    y = data[idx]
    for (a_prior, b_prior), c in zip(beta_params, ('b', 'r', 'g')):
        p_theta_given_y = dist.pdf(x, a_prior + y, b_prior + N - y)
        plt.plot(x, p_theta_given_y, c)
        plt.fill_between(x, 0, p_theta_given_y, color=c, alpha=0.6)
    plt.axvline(theta_real, ymax=0.3, color='k')
    plt.plot(0, 0, label="{:d} experiments\n{:d} heads".format(N,y), alpha=0)
    plt.xlim(0,1)
    plt.ylim(0,12)
    plt.xlabel(r'$\theta$')
    plt.legend()
    plt.gca().axes.get_yaxis().set_visible(False)
plt.tight_layout()

Blue is uniform prior
red has more 1, 0 than uniform
green is centered around 0.5, we think we know the answer

Solve using a grid method

ch2 page 34



In [6]:

    
def posterior_grid(grid_points=100, heads=6, tosses=9):
    """
    A grid implementation for the coin-flip problem
    """
    grid = np.linspace(0, 1, grid_points)
    prior = np.repeat(1, grid_points)
    likelihood = stats.binom.pmf(heads, tosses, grid)
    unstd_posterior = likelihood * prior
    posterior = unstd_posterior / unstd_posterior.sum()
    return grid, posterior



In [7]:

    
#Assuming we made 4 tosses and we observe only 1 head we have the following:
points = 15
h, n = 1, 4
grid, posterior = posterior_grid(points, h, n)
plt.plot(grid, posterior, 'o-', label='heads = {}\ntosses = {}'.format(h, n))
plt.xlabel(r'$\theta$')
plt.legend(loc=0)









    Out[7]:





<matplotlib.legend.Legend at 0x7ff0a06cc290>



In [8]:

    
#Assuming we made 40 tosses and we observe only 1 head we have the following:
points = 15
h, n = 1, 40
grid, posterior = posterior_grid(points, h, n)
plt.plot(grid, posterior, 'o-', label='heads = {}\ntosses = {}'.format(h, n))
plt.xlabel(r'$\theta$')
plt.legend(loc=0)









    Out[8]:





<matplotlib.legend.Legend at 0x7ff088378210>



In [9]:

    
#Assuming we made 40 tosses and we observe 24 head we have the following:
points = 15
h, n = 24, 40
grid, posterior = posterior_grid(points, h, n)
plt.plot(grid, posterior, 'o-', label='heads = {}\ntosses = {}'.format(h, n))
plt.xlabel(r'$\theta$')
plt.legend(loc=0)

plt.figure()
points = 150
h, n = 24, 40
grid, posterior = posterior_grid(points, h, n)
plt.plot(grid, posterior, 'o-', label='heads = {}\ntosses = {}'.format(h, n))
plt.xlabel(r'$\theta$')
plt.legend(loc=0)









    Out[9]:





<matplotlib.legend.Legend at 0x7ff0883ece10>

Chapter 2

Coin flip pymc3



In [10]:

    
np.random.seed(123)
n_experiments = 4
theta_real = 0.35
data = stats.bernoulli.rvs(p=theta_real, size=n_experiments)
print(data)



In [11]:

    
XX = np.linspace(0,1,100)
plt.plot(XX, stats.beta(1,1).pdf(XX))









    Out[11]:





[<matplotlib.lines.Line2D at 0x7ff0c0f114d0>]



In [12]:

    
with pm.Model() as our_first_model:
    theta = pm.Beta('theta', alpha=1, beta=1)
    y = pm.Bernoulli('y', p=theta, observed=data)
    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(1000, step=step, start=start, chains=4)









    



/Users/balarsen/anaconda3/envs/python3/lib/python3.7/site-packages/pymc3/tuning/starting.py:61: UserWarning: find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way.
  warnings.warn('find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way.')
logp = -2.7726, ||grad|| = 1: 100%|██████████| 6/6 [00:00<00:00, 1727.59it/s]
Multiprocess sampling (4 chains in 4 jobs)
Metropolis: [theta]
Sampling 4 chains: 100%|██████████| 6000/6000 [00:00<00:00, 7484.26draws/s]
The number of effective samples is smaller than 25% for some parameters.

Convergence checking

page 49



In [13]:

    
burnin = 100
chain = trace[burnin:]
ax = pm.traceplot(chain, lines={'theta':theta_real});
ax[0][0].axvline(theta_real, c='r')









    Out[13]:





<matplotlib.lines.Line2D at 0x7ff0e2f84b10>



In [14]:

    
theta_real









    Out[14]:





0.35



In [15]:

    
pm.gelman_rubin(chain) # want < 1.1









    Out[15]:





{'theta': 1.0071213766545901}



In [16]:

    
pm.forestplot(chain)









    Out[16]:





(<Figure size 432x316.8 with 1 Axes>,
 array([<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0c13e5390>],
       dtype=object))



In [17]:

    
pm.summary(trace)



In [18]:

    
pm.autocorrplot(trace)









    Out[18]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0c15cf6d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7ff0e3fccb10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7ff0e3ceef50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7ff0e38f4110>]],
      dtype=object)



In [19]:

    
# a measure of eff n based on autocorrelecation
pm.effective_n(trace)









    Out[19]:





{'theta': 622.1273761248884}



In [20]:

    
# AKA Kruschke plot
pm.plot_posterior(trace)









    Out[20]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0e3b3ec10>],
      dtype=object)



In [21]:

    
pm.plot_posterior(trace, rope=[0.45, .55])









    Out[21]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0e3dc3790>],
      dtype=object)



In [22]:

    
pm.plot_posterior(trace, ref_val=0.50)









    Out[22]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0e4096950>],
      dtype=object)

Try pymc3 with a lot more data

Coin is clearly not fair at the 1000 flips level



In [23]:

    
data = stats.bernoulli.rvs(p=theta_real, size=1000) # 1000 flips in the data



In [24]:

    
with pm.Model() as our_first_model:
    theta = pm.Beta('theta', alpha=1, beta=1)
    y = pm.Bernoulli('y', p=theta, observed=data)
    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(10000, step=step, start=start, chains=4)









    



/Users/balarsen/anaconda3/envs/python3/lib/python3.7/site-packages/pymc3/tuning/starting.py:61: UserWarning: find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way.
  warnings.warn('find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way.')
logp = -693.15, ||grad|| = 158: 100%|██████████| 7/7 [00:00<00:00, 2352.20it/s]
Multiprocess sampling (4 chains in 4 jobs)
Metropolis: [theta]
Sampling 4 chains: 100%|██████████| 42000/42000 [00:05<00:00, 7733.99draws/s]
The number of effective samples is smaller than 25% for some parameters.



In [25]:

    
burnin = 100
chain = trace[burnin:]
ax = pm.traceplot(chain, lines={'theta':theta_real});
ax[0][0].axvline(theta_real, c='r')









    Out[25]:





<matplotlib.lines.Line2D at 0x7ff0c108f6d0>



In [26]:

    
pm.gelman_rubin(chain) # want < 1.1









    Out[26]:





{'theta': 1.000043320437209}



In [27]:

    
pm.forestplot(chain) # super tight range









    Out[27]:





(<Figure size 432x316.8 with 1 Axes>,
 array([<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0c1544250>],
       dtype=object))



In [28]:

    
pm.summary(trace)









    Out[28]:







  
    
      
      mean
      sd
      mc_error
      hpd_2.5
      hpd_97.5
      n_eff
      Rhat
    
  
  
    
      theta
      0.342285
      0.014578
      0.000207
      0.315488
      0.372248
      4809.592196
      1.000044



In [29]:

    
pm.autocorrplot(trace)









    Out[29]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0e4140210>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7ff088b557d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7ff0e417b090>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7ff088b66b90>]],
      dtype=object)



In [30]:

    
pm.effective_n(trace)









    Out[30]:





{'theta': 4809.5921958194795}



In [31]:

    
pm.plot_posterior(trace, rope=[0.45, .55])









    Out[31]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0a08e2390>],
      dtype=object)



In [32]:

    
pm.plot_posterior(trace, ref_val=0.50)









    Out[32]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0c0af2a50>],
      dtype=object)

Try pymc3 with a lot more data

Coin is not fair at the 25 flips level (for these data)



In [33]:

    
data = stats.bernoulli.rvs(p=theta_real, size=25) # 25 flips in the data



In [34]:

    
with pm.Model() as our_first_model:
    theta = pm.Beta('theta', alpha=1, beta=1)
    y = pm.Bernoulli('y', p=theta, observed=data)
    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(10000, step=step, start=start, chains=4)









    



/Users/balarsen/anaconda3/envs/python3/lib/python3.7/site-packages/pymc3/tuning/starting.py:61: UserWarning: find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way.
  warnings.warn('find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way.')
logp = -17.329, ||grad|| = 6.5: 100%|██████████| 7/7 [00:00<00:00, 2563.08it/s]
Multiprocess sampling (4 chains in 4 jobs)
Metropolis: [theta]
Sampling 4 chains: 100%|██████████| 42000/42000 [00:05<00:00, 7731.51draws/s]
The number of effective samples is smaller than 25% for some parameters.



In [35]:

    
burnin = 100
chain = trace[burnin:]
ax = pm.traceplot(chain, lines={'theta':theta_real});
ax[0][0].axvline(theta_real, c='r')









    Out[35]:





<matplotlib.lines.Line2D at 0x7ff0b105f7d0>



In [36]:

    
pm.gelman_rubin(chain) # want < 1.1









    Out[36]:





{'theta': 1.0000285410022884}



In [37]:

    
pm.forestplot(chain) # super tight range









    Out[37]:





(<Figure size 432x316.8 with 1 Axes>,
 array([<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0c152a110>],
       dtype=object))



In [38]:

    
pm.summary(trace)









    Out[38]:







  
    
      
      mean
      sd
      mc_error
      hpd_2.5
      hpd_97.5
      n_eff
      Rhat
    
  
  
    
      theta
      0.259769
      0.082795
      0.000832
      0.103022
      0.418125
      8752.246218
      1.000019



In [39]:

    
pm.autocorrplot(trace)









    Out[39]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0c187fdd0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7ff0c166bed0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7ff0e44012d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7ff0a0f7df10>]],
      dtype=object)



In [40]:

    
pm.effective_n(trace)









    Out[40]:





{'theta': 8752.246217874024}



In [41]:

    
pm.plot_posterior(trace, rope=[0.45, .55])









    Out[41]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x7ff088b7bdd0>],
      dtype=object)



In [42]:

    
pm.plot_posterior(trace, ref_val=0.50)









    Out[42]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0a101fa50>],
      dtype=object)



In [43]:

    
pm.plot_posterior(trace, ref_val=0.50, rope=[0.45, .55])









    Out[43]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0e45de890>],
      dtype=object)

Explore priors on the coin flip

Ex 2-5 page 59



In [44]:

    
np.random.seed(123)
n_experiments = 4
theta_real = 0.35
data = stats.bernoulli.rvs(p=theta_real, size=n_experiments)
print(data)



In [48]:

    
with pm.Model() as our_first_model:
    theta = pm.Beta('theta', alpha=1, beta=1)
    y = pm.Bernoulli('y', p=theta, observed=data)
    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(5000, step=step, start=start, chains=8)
pm.plot_posterior(trace, ref_val=0.50, rope=[0.45, .55])
plt.title("pm.Beta('theta', alpha=1, beta=1)")









    



/Users/balarsen/anaconda3/envs/python3/lib/python3.7/site-packages/pymc3/tuning/starting.py:61: UserWarning: find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way.
  warnings.warn('find_MAP should not be used to initialize the NUTS sampler, simply call pymc3.sample() and it will automatically initialize NUTS in a better way.')
logp = -2.7726, ||grad|| = 1: 100%|██████████| 6/6 [00:00<00:00, 2196.16it/s]
Multiprocess sampling (8 chains in 4 jobs)
Metropolis: [theta]
Sampling 8 chains: 100%|██████████| 44000/44000 [00:05<00:00, 7625.10draws/s]
The number of effective samples is smaller than 25% for some parameters.






    Out[48]:





Text(0.5, 1.0, "pm.Beta('theta', alpha=1, beta=1)")



In [53]:

    
with pm.Model() as our_first_model:
    theta = pm.Uniform('theta', .2, .4)
    y = pm.Bernoulli('y', p=theta, observed=data)
    step = pm.Metropolis()
    trace = pm.sample(5000, step=step, chains=8)
pm.plot_posterior(trace, ref_val=0.50, rope=[0.45, .55])
plt.title("pm.Uniform('theta', 0, 1)")









    



Multiprocess sampling (8 chains in 4 jobs)
Metropolis: [theta]
Sampling 8 chains: 100%|██████████| 44000/44000 [00:05<00:00, 7619.16draws/s]
The number of effective samples is smaller than 25% for some parameters.






    Out[53]:





Text(0.5, 1.0, "pm.Uniform('theta', 0, 1)")



In [50]:

    
with pm.Model() as our_first_model:
    theta = pm.Normal('theta', 0.35, 1)
    y = pm.Bernoulli('y', p=theta, observed=data)
    step = pm.Metropolis()
    trace = pm.sample(5000, step=step, chains=8)
pm.plot_posterior(trace, ref_val=0.50, rope=[0.45, .55])
plt.title("pm.Normal('theta', 0.35, 1)")









    



Multiprocess sampling (8 chains in 4 jobs)
Metropolis: [theta]
Sampling 8 chains: 100%|██████████| 44000/44000 [00:05<00:00, 7795.77draws/s]
The number of effective samples is smaller than 25% for some parameters.






    Out[50]:





Text(0.5, 1.0, "pm.Normal('theta', 0.35, 1)")



In [52]:

    
pm.plots.densityplot(trace, hpd_markers='v')









    Out[52]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x7ff0e4008f10>],
      dtype=object)



In [ ]:



In [ ]:



In [ ]:



In [ ]: